# -*- coding: utf-8 -*-
__author__ = 'qh'

import time
import pandas as pd
import numpy as np
import os

'''
加载数据
'''


def userStat():
    dataPath = os.getcwd()
    parent_path = os.path.dirname(dataPath)

    info = pd.read_csv(parent_path + '\data\sample_info.csv')
    log = pd.read_csv(parent_path + '\data\sample_log.csv')


    return log


'''
提取特征
'''


def getFeature():
    data = userStat()
    print len(data.groupby('user_id'))
    feature = np.zeros((len(data.groupby('user_id')), 80))
    index = 0
    for user_id,userData in data.groupby('user_id'):
        #print user_id
        #print group

        userClickData = userData[userData.action_type==0]
        userBuyData = userData[userData.action_type==1]
        userCollectData = userData[userData.action_type==2]
        userCartData = userData[userData.action_type==3]
        feature[index,0] =  len(userClickData)
        feature[index,1] =  len(userBuyData)
        feature[index,2] =  len(userCollectData)
        feature[index,3] =  len(userCartData)
        feature[index, 4:8] = feature[index, 0:4] / (int(sum(feature[index, 0:4])))
        # 用户在线天数
        feature[index, 8] = np.unique(userData.time_stamp.values).shape[0]
         # 用户购物天数
        #feature[index, 9] = np.unique(userBuyDays).shape[0]



    # for user in userSet:
    #     #print user
    #     # 用户在线次数
    #     userOnlineDays = np.array([])
    #     userBuyDays = np.array([])
    #     #点击的商品，种类，品牌，卖家列表
    #     clickItems = np.array([])
    #     clickCategorys = np.array([])
    #     clickBrands = np.array([])
    #     clickSellers = np.array([])
    #     #购买的商品，种类，品牌，卖家列表
    #     buyItems = np.array([])
    #     buyCategorys = np.array([])
    #     buyBrands = np.array([])
    #     buySellers = np.array([])
    #     #收藏的商品，种类，品牌，卖家列表
    #     collectItems = np.array([])
    #     collectCategorys = np.array([])
    #     collectBrands = np.array([])
    #     collectSellers = np.array([])
    #     #加入购物车的商品，种类，品牌，卖家列�
    #     cartItems = np.array([])
    #     cartCategorys = np.array([])
    #     cartBrands = np.array([])
    #     cartSellers = np.array([])
    #     # 购买前浏览次�
    #     beforeBuyBrowserNum = 0
    #     # 购买前浏览天�
    #     beforeBuyBrowserDays = set()
    #     # 隔天购买次数
    #     continuousBuyDays = 0
    #     #周日与周�活跃度列�
    #     userOnlinemidweekDays = np.array([])
    #     userOnlineweekendDays = np.array([])
    #     #周日与周�购买列表
    #     userBuymidweekDays = np.array([])
    #     userBuyweekendDays = np.array([])
    #     #周日与周�购买列表
    #     lines = np.where(data[:,0]==user)[0]
    #
    #     #print data[lines]
    #     userData = pd.DataFrame(data[lines],columns=['user_id','item_id','cat_id','seller_id','brand_id','time_stamp','action_type'])
    #     #interactiveOnDayList = pd.value_counts(userData['time_stamp']).values
    #     #用户的活跃当天item,category,seller,brand信息
    #     userICSB_data = userData.groupby(userData['time_stamp']).count()
    #
    #     userBuyData = userData[userData.action_type==1]
    #     userBuyICSB_data = userBuyData.groupby(userBuyData['time_stamp']).count()
    #
    #     for line in lines:
    #         userOnlineDays = np.append(userOnlineDays, data[line, 5])
    #         day = data[line, 5]
    #         #周日与周�活跃度列�
    #         if day%7 <= 4:
    #             userOnlinemidweekDays = np.append(userOnlinemidweekDays, day)
    #         else:
    #             userOnlineweekendDays = np.append(userOnlineweekendDays, day)
    #
    #         # 点击
    #
    #         # 点击
    #         if data[line, 6] == 0:
    #             # 点击�
    #             feature[index, 0] += 1
    #             #点击的商品，种类，品牌，卖家列表
    #             clickItems = np.append(clickItems, feature[index, 1])
    #             clickCategorys = np.append(clickCategorys, feature[index, 2])
    #             clickBrands = np.append(clickBrands, feature[index, 4])
    #             clickSellers = np.append(clickSellers, feature[index, 3])
    #
    #         # 购买
    #         if data[line, 6] == 1:
    #             currentIndex = line
    #             #点击的商品，种类，品牌，卖家列表
    #             clickItems = np.append(clickItems, feature[index, 1])
    #             clickCategorys = np.append(clickCategorys, feature[index, 2])
    #             clickBrands = np.append(clickBrands, feature[index, 4])
    #             clickSellers = np.append(clickSellers, feature[index, 3])
    #
    #         # 购买
    #         if data[line, 6] == 1:
    #             currentIndex = line
    #             # 购买�
    #             feature[index, 1] += 1
    #             # 购买时间
    #             userBuyDay = data[line, 5]
    #             # 购买时间数组
    #             userBuyDays = np.append(userBuyDays, userBuyDay)
    #             # 购买的商�
    #             item_id = data[line, 1]
    #
    #             # 遍历该用户数据，找出商品ID且时间比购买时间�
    #             for user_index in lines:
    #                 if data[user_index, 1] == item_id and data[user_index, 1] < userBuyDay:
    #                     beforeBuyBrowserNum += 1
    #                     beforeBuyBrowserDays.add(data[user_index, 5])
    #                 if data[user_index, 6] == 1:
    #                     if int(data[user_index, 5] - data[currentIndex, 5]) == 1:
    #                         continuousBuyDays += 1
    #             #购买的商品，种类，品牌，卖家列表
    #             buyItems = np.append(buyItems, feature[index, 1])
    #             buyCategorys = np.append(buyCategorys, feature[index, 2])
    #             buyBrands = np.append(buyBrands, feature[index, 4])
    #             buySellers = np.append(buySellers, feature[index, 3])
    #
    #             #周日与周�活跃度列�
    #             if userBuyDay%7 <= 4:
    #                 userBuymidweekDays = np.append(userBuymidweekDays, day)
    #             else:
    #                 userBuyweekendDays = np.append(userBuyweekendDays, day)
    #
    #         # 收藏
    #                 userBuyweekendDays = np.append(userBuyweekendDays, day)
    #
    #         # 收藏
    #         if data[line, 6] == 2:
    #             # 收藏�
    #             feature[index, 2] += 1
    #             #收藏的商品，种类，品牌，卖家列表
    #             collectItems = np.append(collectItems, feature[index, 1])
    #             collectCategorys = np.append(collectCategorys, feature[index, 2])
    #             collectBrands = np.append(collectBrands, feature[index, 4])
    #             collectSellers = np.append(collectSellers, feature[index, 3])
    #
    #         # 加入购物�
    #         # 加入购物�
    #         if data[line, 6] == 3:
    #             # 加入购物车量
    #             feature[index, 3] += 1
    #             #加入购物车的商品，种类，品牌，卖家列�
    #             cartItems = np.append(cartItems, feature[index, 1])
    #             cartCategorys = np.append(cartCategorys, feature[index, 2])
    #             cartBrands = np.append(cartBrands, feature[index, 4])
    #             cartSellers = np.append(cartSellers, feature[index, 3])
    #     # 点击，购买，收藏，购物车比率
    #     feature[index, 4:8] = feature[index, 0:4] / (int(sum(feature[index, 0:4])))
    #     # 用户在线天数
    #     feature[index, 8] = np.unique(userOnlineDays).shape[0]
    #     # 用户购物天数
    #     feature[index, 9] = np.unique(userBuyDays).shape[0]
    #     # 购买天数/在线天数
    #     feature[index, 10] = len(userBuyDays) / int(len(userOnlineDays))
    #     # 购买前平均访问天数及购买前平均访问次�
    #     if feature[index, 1] != 0:
    #         feature[index, 11] = beforeBuyBrowserNum / int(feature[index, 1])
    #         feature[index, 12] = len(beforeBuyBrowserDays) / int(feature[index, 1])
    #     else:
    #         feature[index, 11] = 0
    #         feature[index, 12] = 0
    #
    #     # 隔天购买次数
    #     feature[index, 13] = continuousBuyDays
    #     # 行为�
    #     feature[index, 14] = sum(feature[index, 0:4])
    #     # 权重行为�
    #     weight = [0.05, 0.5, 0.3, 0.15]
    #     feature[index, 15] = sum(np.array(weight) * feature[index, 0:4])
    #     # 权重行为平方�
    #     feature[index, 16] = sum(np.square(np.array(weight) * feature[index, 0:4]))
    #
    #     timeInterval = arrayInterval(userOnlineDays)
    #     # 平均访问间隔
    #     feature[index, 17] = timeInterval.mean()
    #     # 最大访问间�
    #     feature[index, 18] = timeInterval.max()
    #     # 最小访问间�
    #     feature[index, 19] = timeInterval.min()
    #     #点击的商品，种类，品牌，卖家数量
    #     feature[index, 20] = np.unique(clickItems).shape[0]
    #     feature[index, 21] = np.unique(clickCategorys).shape[0]
    #     feature[index, 22] = np.unique(clickBrands).shape[0]
    #     feature[index, 23] = np.unique(clickSellers).shape[0]
    #     #购买的商品，种类，品牌，卖家数量
    #     feature[index, 24] = np.unique(buyItems).shape[0]
    #     feature[index, 25] = np.unique(buyCategorys).shape[0]
    #     feature[index, 26] = np.unique(buyBrands).shape[0]
    #     feature[index, 27] = np.unique(buySellers).shape[0]
    #     #收藏的商品，种类，品牌，卖家数量
    #     feature[index, 28] = np.unique(collectItems).shape[0]
    #     feature[index, 29] = np.unique(collectCategorys).shape[0]
    #     feature[index, 30] = np.unique(collectBrands).shape[0]
    #     feature[index, 31] = np.unique(collectSellers).shape[0]
    #     #加入购物车的商品，种类，品牌，卖家数�
    #     feature[index, 32] = np.unique(cartItems).shape[0]
    #     feature[index, 33] = np.unique(cartCategorys).shape[0]
    #     feature[index, 34] = np.unique(cartBrands).shape[0]
    #     feature[index, 35] = np.unique(cartSellers).shape[0]
    #     #周日与周�活跃�
    #     #活跃次数
    #     feature[index, 36] = userOnlinemidweekDays.shape[0]
    #     feature[index, 37] = userOnlineweekendDays.shape[0]
    #     #活跃天数
    #     feature[index, 38] = np.unique(userOnlinemidweekDays).shape[0]
    #     feature[index, 39] = np.unique(userOnlineweekendDays).shape[0]
    #
    #     #周日与周�购买
    #     #购买次数
    #     feature[index, 40] = userBuymidweekDays.shape[0]
    #     feature[index, 41] = userBuyweekendDays.shape[0]
    #     #购买天数
    #     feature[index, 42] = np.unique(userBuymidweekDays).shape[0]
    #     feature[index, 43] = np.unique(userBuyweekendDays).shape[0]
    #
    #     #周日与周�购买次数比率
    #     feature[index, 44] = -1 if userOnlinemidweekDays.shape[0] == 0 else userBuymidweekDays.shape[0]/int(userOnlinemidweekDays.shape[0])
    #     feature[index, 45] = -1 if userOnlineweekendDays.shape[0] == 0 else userBuyweekendDays.shape[0]/int(userOnlineweekendDays.shape[0])
    #     #周日与周�购买天数比率
    #     feature[index, 46] = -1 if userOnlinemidweekDays.shape[0] == 0 else np.unique(userBuymidweekDays).shape[0]/int(np.unique(userOnlinemidweekDays).shape[0])
    #     feature[index, 47] = -1 if userOnlineweekendDays.shape[0] == 0 else np.unique(userBuyweekendDays).shape[0]/int(np.unique(userOnlineweekendDays).shape[0])
    #
    #     #最后一次交互到结束的时间间隔
    #     #最早一次交互到结束的时间间隔
    #     #print userOnlineDays
    #     feature[index, 48] = 200 - int(userOnlineDays.min())
    #     feature[index, 49] = 200 - int(userOnlineDays.max())
    #
    #     #交互当天的浏览次数最大值，最小值，均值
    #     feature[index, 50] = userICSB_data.user_id.values.max()
    #     feature[index, 51] = userICSB_data.user_id.min()
    #     feature[index, 52] = userICSB_data.user_id.mean()
    #     #交互当天浏览的商品数 最大值，最小值，均值
    #     feature[index, 53] = userICSB_data.item_id.values.max()
    #     feature[index, 54] = userICSB_data.item_id.min()
    #     feature[index, 55] = userICSB_data.item_id.mean()
    #     #交互当天浏览的种类数 最大值，最小值，均值
    #     feature[index, 56] = userICSB_data.cat_id.values.max()
    #     feature[index, 57] = userICSB_data.cat_id.min()
    #     feature[index, 58] = userICSB_data.cat_id.mean()
    #
    #     #交互当天浏览的商家数 最大值，最小值，均值
    #     feature[index, 59] = userICSB_data.seller_id.values.max()
    #     feature[index, 60] = userICSB_data.seller_id.min()
    #     feature[index, 61] = userICSB_data.seller_id.mean()
    #
    #     #交互当天浏览的品牌数 最大值，最小值，均值
    #     feature[index, 62] = userICSB_data.brand_id.values.max()
    #     feature[index, 63] = userICSB_data.brand_id.min()
    #     feature[index, 64] = userICSB_data.brand_id.mean()
    #     #
    #     if len(userBuyICSB_data.user_id.values) == 0:
    #         feature[index,65:80] = -1
    #     else:
    #
    #         #购买当天的浏览次数最大值，最小值，均值
    #         feature[index, 65] = userBuyICSB_data.user_id.values.max()
    #         feature[index, 66] = userBuyICSB_data.user_id.min()
    #         feature[index, 67] = userBuyICSB_data.user_id.mean()
    #         #购买当天浏览的商品数 最大值，最小值，均值
    #         feature[index, 68] = userBuyICSB_data.item_id.values.max()
    #         feature[index, 69] = userBuyICSB_data.item_id.min()
    #         feature[index, 70] = userBuyICSB_data.item_id.mean()
    #         #购买当天浏览的种类数 最大值，最小值，均值
    #         feature[index, 71] = userBuyICSB_data.cat_id.values.max()
    #         feature[index, 72] = userBuyICSB_data.cat_id.min()
    #         feature[index, 73] = userBuyICSB_data.cat_id.mean()
    #
    #         #购买当天浏览的商家数 最大值，最小值，均值
    #         feature[index, 74] = userBuyICSB_data.seller_id.values.max()
    #         feature[index, 75] = userBuyICSB_data.seller_id.min()
    #         feature[index, 76] = userBuyICSB_data.seller_id.mean()
    #
    #         #购买当天浏览的品牌数 最大值，最小值，均值
    #         feature[index, 77] = userICSB_data.brand_id.values.max()
    #         feature[index, 78] = userICSB_data.brand_id.min()
    #         feature[index, 79] = userICSB_data.brand_id.mean()
    #     index += 1

    return feature


# 求数组间�
def arrayInterval(arr):
    if len(arr) <= 1:
        return np.array([0])
    return np.array([j - i for i, j in zip(arr[:-1], arr[1:])])


t0 = time.time()
feature = getFeature()
pretime = time.time() - t0
print("time :  %.3f s" % pretime)
print (feature[0, :])
